
####################################################################################################
# R CODE FOR SIMULATION OF GLMDM, (c) Kyung, Casella, and Gill, February 2008
####################################################################################################

####################################################################################################
# SETUP UP DATA 
####################################################################################################
library(MASS); library(msm); library(mvtnorm); library(compositions); library(e1071) 
library(coda); library(LearnBayes); library(MCMCpack)

# LOAD DATA AND FIX PARAMETERS 
ssas.df <- read.table("http://jgill.wustl.edu/data/ssas.df",header=TRUE)
attach(ssas.df)
Y <- scotpar2
X <- cbind(rep(1,length=length(Y)),househld,rsex,rage,relgsums,ptyallgs,idlosem,marrmus,ukintnat,
	natinnat,voiceuk3,nhssat,hincdif2,unionsa,whrbrn,hedqual2)
detach(ssas.df)

# SET QUANTITIES FROM DATA
n <- nrow(X); K <- ncol(X)
beta <- runif(K,-3,3)
mu <- 0; tau <- 1; 

####################################################################################################
# SOLUTION FOR m
####################################################################################################
fm <-function(m.val){m.val*sum(1/seq(m.val,m.val+n-1,by=1))}
lower.m<-0.001
upper.m<-K*(n-1)/(n-K)
tol    <-0.01

getm<-function(n,K) {				
    while (lower.m+tol<upper.m){
	mstar = (lower.m+upper.m)/2
	if (fm(mstar)<K) lower.m=mstar
	else upper.m=mstar
    }		
    m <- mstar
    return(m)
}
m <- getm(n,K)
#cat(c("True K:", K),fill=TRUE)
#cat(c("True m:", m),fill=TRUE)

q <- rDirichlet.acomp(1, alpha=rep(m/K, K))
C <- rdiscrete(n, q)

A <- matrix(0,nrow=n,ncol=n)
done.vec <- rep(0,n)
for (i in 1:n)  {
     A[i,C[i]] <- 1
	    if (done.vec[i] == 0)  {
	        test.vec <- C; test.vec[test.vec != C[i]] <- 0; test.vec[test.vec == C[i]] <- 1
	    }
}

####################################################################################################
# RUN A GLM TO GET hat.beta and hat.var
####################################################################################################
glm.out  <- glm(Y ~ X[,-1],family=binomial(link=probit))
hat.beta <- coefficients(summary(glm.out))[,1]
hat.var  <- (coefficients(summary(glm.out))[,2])^2

####################################################################################################
# FIX PRIOR DISTRIBUTIONS AND PRIOR PARAMETERS
####################################################################################################
num.reps <- 5000;
#a<-1; b<-1
#a1 <- 3; b1 <- 2; d <- 5 
a1 <- 3; b1 <- 2; d <- 0.25
# For the prior of m, we use Gamma(a,b)
# Here ab=MM and ab^2=VV. 
MM <- 15
VV <- 30
Sh <- (MM^2)/VV
Sc <- VV/MM

# PRE-LOOP EFFICIENCIES
tau.sq        <- rinvgamma(1,shape=a1,scale=b1)
tau           <- sqrt(tau.sq)
sigma.sq      <- 1
sigma         <- sqrt(sigma.sq)
sigma.beta.sq <- sigma.sq * d
p             <- length(hat.beta)     
aa            <- chol(solve(t(X) %*% X + diag(1/sigma.beta.sq, nrow=ncol(X))))
SIG           <- t(X)%*%X + diag(1/sigma.beta.sq, nrow=ncol(X))				# VAR/COVAR MATRIX
SIG.inv       <- solve(SIG)								# INVERT HERE FOR EFFICIENCY
Z             <- rep(NA,length=n)							# Z TO FILL-IN DURING LOOPING
beta          <- rmvnorm(1, mean=hat.beta, sigma=diag(hat.var))				# STARTING VALUES FROM ABOVE
beta          <- rbind( beta,matrix(rep(NA,num.reps*ncol(beta)),ncol=ncol(beta)) )	# MATRIX TO FILL IN DURING LOOP
n.i           <- rep(1,n);   								# LOOP NEEDS REP OF n'S
q             <- rDirichlet.acomp(1, alpha=n.i)						# PROBABILITY OF ASSIGNMENT
A.n 	      <- sample(1:n,n,replace=TRUE,q)				 		# CREATES THE ASSIGNMENTS.
A.K 	      <- table(A.n)						 		# CREATES THE LIST OF OCCUPIED
A.K.labels    <- as.numeric(names(A.K))  						# LOCATIONS OF OCCUPIED
K	      <- length(A.K)								# NUMBER OF OCCUPIED TABLES
B             <- rep(0,length=n)							# LENGTH n FOR ALL CASES
B[A.K.labels] <- 1									# 1 WHERE OCCUPIED >0
B             <- B*rnorm(n,0,tau)							# ASSIGN psi VALUES TO OCCUPIED
nk            <- rep(0,n); for (i in 1:n) nk[A.n[i]] <- nk[A.n[i]] + 1			# COUNTS OCCUPANTS AT TABLES
psi           <- B[A.n] 								# MAP TABLE psi'S TO CASES
like.K        <- 0									# LOG-LIKE STARTS AT ZERO
X.beta1       <- X%*%beta[1,]								# MULTIPLY X AND beta IN ADVANCE
for (i in 1:n)  
    like.K <- like.K + Y[i] * pnorm( X.beta1[i] + psi[i],log=TRUE) + 
		(1-Y[i]) * (1 - pnorm( X.beta1[i] + psi[i] )) + 
		dnorm(psi[i], mean=0, sd=tau,log=TRUE)
like.K <- exp(like.K + sum(lgamma(A.K)))

# SETUP TO SAVE GIBBS SAMPLER VALUES
tau.sq.post <- psi.post <- beta.post <- xi.post <- K.save <- like.K.save <- m.save <- NULL 

####################################################################################################
# NEW POSTERIOR FOR m FUNCTION
####################################################################################################
initial.m <- m <-10 ; #initial value of m
like      <- function(m.v){gamma(m.v)/gamma(m.v+n) * dgamma(m.v, shape=Sh, scale=Sc) * m.v^K}
loglike   <- function(m.v){lgamma(m.v)-lgamma(m.v+n) + dgamma(m.v, shape=Sh, scale=Sc, log=TRUE) + K*log(m.v)}
loglike.s <- function(m.v){lgamma(m.v)-lgamma(m.v+n) + dgamma(m.v, shape=Sh, scale=Sc, log=TRUE) + K*log(m.v) + nu*log(m.v)}

####################################################################################################
# CALL GIBBS FUNCTION
####################################################################################################
cat(c("Job started at:",date()),fill=T)
source("/Users/jgill/Article.Dirichlet/R-GLMDM/Probit/glmdm_probit2b.R")
#source("glmdm_probit2b.R")
cat(c("Job finished at:",date()))

scotland2.beta <- beta
save.image()



#######################################
# Histogram of K and m
#######################################
#K.est <- c()
#for (l in 1:length(m.save)){
#   K.est[l] <- m.save[l]*sum(1/seq(m.save[l],m.save[l]+n-1,by=1))
#	}
#postscript("Hist of K for probit.ps")	
#hist(K.est,main="Histogram of the estimated K")
#dev.off()
#
#postscript("Hist of m for probit.ps")
#hist(m.save,main="Histogram of the estimated m")
#dev.off()
########################################
## Plots for Betas
#########################################
#beta.cumsum <- apply(beta,2,cumsum)/c(1:nrow(beta))
#ts.plot(beta.cumsum, gpars=list(xlab="time", ylab="beta", lty=c(1:3)), main="Trasition Plot of Beta from GLMDM")
#legend(10,25,c("beta0","beta1","beta2"),lty=c(1:3))
#
#postscript("Trace of betas for probit.ps")
#par(mfrow=c(3,1))
#ts.plot(beta[,1],col=1)
#ts.plot(beta[,2],col=2)
#ts.plot(beta[,3],col=3)
#dev.off()

#beta.post <- beta[502:1001,]
#mean.beta <- apply(beta.post,2,mean)
#sd.beta <- apply(beta.post,2,sd)
#
#postscript("Hist of betas for probit.eps")
#par(mfrow=c(2,2))
#hist(beta.post[,1],main="Histogram of the estimated beta0")
#hist(beta.post[,2],main="Histogram of the estimated beta1")
#hist(beta.post[,3],main="Histogram of the estimated beta2")
#dev.off()
#
####################################
## Plots for tau^2
###################################
##ts.plot(tau.sq.post)
#
#postscript("Hist of tau for probit.eps")
#tau.sq <- tau.sq.post[501:1000]
#hist(tau.sq, main="Histogram of the estimated tau^2")
#dev.off()
#
#####################################
## Autocorrelation Plots of betas
#####################################
#postscript("ACF plots of probit.eps")
#acf(beta)
#dev.off()
#
#postscript("ACF of betas_probit.ps")
#par(mfrow=c(1,3))
#acf(beta[,1], main="ACF of beta0 of Probit Model from Gibbs Sampler")
#acf(beta[,2], main="ACF of beta1 of Probit Model from Gibbs Sampler")
#acf(beta[,3], main="ACF of beta2 of Probit Model from Gibbs Sampler")
#dev.off()
